{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# import the libraries\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import the data\n",
    "dataset = pd.read_csv('C:/Users/Susan/Desktop/50_Startups.csv')\n",
    "X = dataset.iloc[:, :-1].values\n",
    "y = dataset.iloc[:, 4].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Encoding categorical variable\n",
    "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
    "labelencoder_X = LabelEncoder()\n",
    "X[:, 3] = labelencoder_X.fit_transform(X[:, 3])\n",
    "onehotencoder = OneHotEncoder(categorical_features = [3])\n",
    "X = onehotencoder.fit_transform(X).toarray()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Avoiding the dummy variable trap\n",
    "X = X[:, 1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Split dataset into training and test\n",
    "from sklearn.cross_validation import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Fitting multiple lineaar regression to the training set\n",
    "from sklearn.linear_model import LinearRegression\n",
    "regressor = LinearRegression()\n",
    "regressor.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Predicting the test set results\n",
    "y_pred = regressor.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>            <td>y</td>        <th>  R-squared:         </th> <td>   0.951</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.945</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   169.9</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Wed, 26 Jul 2017</td> <th>  Prob (F-statistic):</th> <td>1.34e-27</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>23:11:26</td>     <th>  Log-Likelihood:    </th> <td> -525.38</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>    50</td>      <th>  AIC:               </th> <td>   1063.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>    44</td>      <th>  BIC:               </th> <td>   1074.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     5</td>      <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "    <td></td>       <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>const</th> <td> 5.013e+04</td> <td> 6884.820</td> <td>    7.281</td> <td> 0.000</td> <td> 3.62e+04</td> <td>  6.4e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x1</th>    <td>  198.7888</td> <td> 3371.007</td> <td>    0.059</td> <td> 0.953</td> <td>-6595.030</td> <td> 6992.607</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x2</th>    <td>  -41.8870</td> <td> 3256.039</td> <td>   -0.013</td> <td> 0.990</td> <td>-6604.003</td> <td> 6520.229</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x3</th>    <td>    0.8060</td> <td>    0.046</td> <td>   17.369</td> <td> 0.000</td> <td>    0.712</td> <td>    0.900</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x4</th>    <td>   -0.0270</td> <td>    0.052</td> <td>   -0.517</td> <td> 0.608</td> <td>   -0.132</td> <td>    0.078</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x5</th>    <td>    0.0270</td> <td>    0.017</td> <td>    1.574</td> <td> 0.123</td> <td>   -0.008</td> <td>    0.062</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>14.782</td> <th>  Durbin-Watson:     </th> <td>   1.283</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.001</td> <th>  Jarque-Bera (JB):  </th> <td>  21.266</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td>-0.948</td> <th>  Prob(JB):          </th> <td>2.41e-05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 5.572</td> <th>  Cond. No.          </th> <td>1.45e+06</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                      y   R-squared:                       0.951\n",
       "Model:                            OLS   Adj. R-squared:                  0.945\n",
       "Method:                 Least Squares   F-statistic:                     169.9\n",
       "Date:                Wed, 26 Jul 2017   Prob (F-statistic):           1.34e-27\n",
       "Time:                        23:11:26   Log-Likelihood:                -525.38\n",
       "No. Observations:                  50   AIC:                             1063.\n",
       "Df Residuals:                      44   BIC:                             1074.\n",
       "Df Model:                           5                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "const       5.013e+04   6884.820      7.281      0.000    3.62e+04     6.4e+04\n",
       "x1           198.7888   3371.007      0.059      0.953   -6595.030    6992.607\n",
       "x2           -41.8870   3256.039     -0.013      0.990   -6604.003    6520.229\n",
       "x3             0.8060      0.046     17.369      0.000       0.712       0.900\n",
       "x4            -0.0270      0.052     -0.517      0.608      -0.132       0.078\n",
       "x5             0.0270      0.017      1.574      0.123      -0.008       0.062\n",
       "==============================================================================\n",
       "Omnibus:                       14.782   Durbin-Watson:                   1.283\n",
       "Prob(Omnibus):                  0.001   Jarque-Bera (JB):               21.266\n",
       "Skew:                          -0.948   Prob(JB):                     2.41e-05\n",
       "Kurtosis:                       5.572   Cond. No.                     1.45e+06\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "[2] The condition number is large, 1.45e+06. This might indicate that there are\n",
       "strong multicollinearity or other numerical problems.\n",
       "\"\"\""
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Building the optimal model using backward elimination\n",
    "import statsmodels.formula.api as sm\n",
    "X = np.append(arr = np.ones((50, 1)).astype(int), values = X, axis = 1)\n",
    "X_opt = X[:, [0, 1, 2, 3, 4, 5]]\n",
    "regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit()\n",
    "regressor_OLS.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>            <td>y</td>        <th>  R-squared:         </th> <td>   0.951</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.946</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   217.2</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Wed, 26 Jul 2017</td> <th>  Prob (F-statistic):</th> <td>8.49e-29</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>23:14:24</td>     <th>  Log-Likelihood:    </th> <td> -525.38</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>    50</td>      <th>  AIC:               </th> <td>   1061.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>    45</td>      <th>  BIC:               </th> <td>   1070.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     4</td>      <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "    <td></td>       <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>const</th> <td> 5.011e+04</td> <td> 6647.870</td> <td>    7.537</td> <td> 0.000</td> <td> 3.67e+04</td> <td> 6.35e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x1</th>    <td>  220.1585</td> <td> 2900.536</td> <td>    0.076</td> <td> 0.940</td> <td>-5621.821</td> <td> 6062.138</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x2</th>    <td>    0.8060</td> <td>    0.046</td> <td>   17.606</td> <td> 0.000</td> <td>    0.714</td> <td>    0.898</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x3</th>    <td>   -0.0270</td> <td>    0.052</td> <td>   -0.523</td> <td> 0.604</td> <td>   -0.131</td> <td>    0.077</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x4</th>    <td>    0.0270</td> <td>    0.017</td> <td>    1.592</td> <td> 0.118</td> <td>   -0.007</td> <td>    0.061</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>14.758</td> <th>  Durbin-Watson:     </th> <td>   1.282</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.001</td> <th>  Jarque-Bera (JB):  </th> <td>  21.172</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td>-0.948</td> <th>  Prob(JB):          </th> <td>2.53e-05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 5.563</td> <th>  Cond. No.          </th> <td>1.40e+06</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                      y   R-squared:                       0.951\n",
       "Model:                            OLS   Adj. R-squared:                  0.946\n",
       "Method:                 Least Squares   F-statistic:                     217.2\n",
       "Date:                Wed, 26 Jul 2017   Prob (F-statistic):           8.49e-29\n",
       "Time:                        23:14:24   Log-Likelihood:                -525.38\n",
       "No. Observations:                  50   AIC:                             1061.\n",
       "Df Residuals:                      45   BIC:                             1070.\n",
       "Df Model:                           4                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "const       5.011e+04   6647.870      7.537      0.000    3.67e+04    6.35e+04\n",
       "x1           220.1585   2900.536      0.076      0.940   -5621.821    6062.138\n",
       "x2             0.8060      0.046     17.606      0.000       0.714       0.898\n",
       "x3            -0.0270      0.052     -0.523      0.604      -0.131       0.077\n",
       "x4             0.0270      0.017      1.592      0.118      -0.007       0.061\n",
       "==============================================================================\n",
       "Omnibus:                       14.758   Durbin-Watson:                   1.282\n",
       "Prob(Omnibus):                  0.001   Jarque-Bera (JB):               21.172\n",
       "Skew:                          -0.948   Prob(JB):                     2.53e-05\n",
       "Kurtosis:                       5.563   Cond. No.                     1.40e+06\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "[2] The condition number is large, 1.4e+06. This might indicate that there are\n",
       "strong multicollinearity or other numerical problems.\n",
       "\"\"\""
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_opt = X[:, [0, 1, 3, 4, 5]]\n",
    "regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit()\n",
    "regressor_OLS.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>            <td>y</td>        <th>  R-squared:         </th> <td>   0.951</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.948</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   296.0</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Wed, 26 Jul 2017</td> <th>  Prob (F-statistic):</th> <td>4.53e-30</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>23:16:49</td>     <th>  Log-Likelihood:    </th> <td> -525.39</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>    50</td>      <th>  AIC:               </th> <td>   1059.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>    46</td>      <th>  BIC:               </th> <td>   1066.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     3</td>      <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "    <td></td>       <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>const</th> <td> 5.012e+04</td> <td> 6572.353</td> <td>    7.626</td> <td> 0.000</td> <td> 3.69e+04</td> <td> 6.34e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x1</th>    <td>    0.8057</td> <td>    0.045</td> <td>   17.846</td> <td> 0.000</td> <td>    0.715</td> <td>    0.897</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x2</th>    <td>   -0.0268</td> <td>    0.051</td> <td>   -0.526</td> <td> 0.602</td> <td>   -0.130</td> <td>    0.076</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x3</th>    <td>    0.0272</td> <td>    0.016</td> <td>    1.655</td> <td> 0.105</td> <td>   -0.006</td> <td>    0.060</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>14.838</td> <th>  Durbin-Watson:     </th> <td>   1.282</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.001</td> <th>  Jarque-Bera (JB):  </th> <td>  21.442</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td>-0.949</td> <th>  Prob(JB):          </th> <td>2.21e-05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 5.586</td> <th>  Cond. No.          </th> <td>1.40e+06</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                      y   R-squared:                       0.951\n",
       "Model:                            OLS   Adj. R-squared:                  0.948\n",
       "Method:                 Least Squares   F-statistic:                     296.0\n",
       "Date:                Wed, 26 Jul 2017   Prob (F-statistic):           4.53e-30\n",
       "Time:                        23:16:49   Log-Likelihood:                -525.39\n",
       "No. Observations:                  50   AIC:                             1059.\n",
       "Df Residuals:                      46   BIC:                             1066.\n",
       "Df Model:                           3                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "const       5.012e+04   6572.353      7.626      0.000    3.69e+04    6.34e+04\n",
       "x1             0.8057      0.045     17.846      0.000       0.715       0.897\n",
       "x2            -0.0268      0.051     -0.526      0.602      -0.130       0.076\n",
       "x3             0.0272      0.016      1.655      0.105      -0.006       0.060\n",
       "==============================================================================\n",
       "Omnibus:                       14.838   Durbin-Watson:                   1.282\n",
       "Prob(Omnibus):                  0.001   Jarque-Bera (JB):               21.442\n",
       "Skew:                          -0.949   Prob(JB):                     2.21e-05\n",
       "Kurtosis:                       5.586   Cond. No.                     1.40e+06\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "[2] The condition number is large, 1.4e+06. This might indicate that there are\n",
       "strong multicollinearity or other numerical problems.\n",
       "\"\"\""
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_opt = X[:, [0, 3, 4, 5]]\n",
    "regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit()\n",
    "regressor_OLS.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>            <td>y</td>        <th>  R-squared:         </th> <td>   0.950</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.948</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   450.8</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Wed, 26 Jul 2017</td> <th>  Prob (F-statistic):</th> <td>2.16e-31</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>23:17:26</td>     <th>  Log-Likelihood:    </th> <td> -525.54</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>    50</td>      <th>  AIC:               </th> <td>   1057.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>    47</td>      <th>  BIC:               </th> <td>   1063.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     2</td>      <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "    <td></td>       <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>const</th> <td> 4.698e+04</td> <td> 2689.933</td> <td>   17.464</td> <td> 0.000</td> <td> 4.16e+04</td> <td> 5.24e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x1</th>    <td>    0.7966</td> <td>    0.041</td> <td>   19.266</td> <td> 0.000</td> <td>    0.713</td> <td>    0.880</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x2</th>    <td>    0.0299</td> <td>    0.016</td> <td>    1.927</td> <td> 0.060</td> <td>   -0.001</td> <td>    0.061</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>14.677</td> <th>  Durbin-Watson:     </th> <td>   1.257</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.001</td> <th>  Jarque-Bera (JB):  </th> <td>  21.161</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td>-0.939</td> <th>  Prob(JB):          </th> <td>2.54e-05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 5.575</td> <th>  Cond. No.          </th> <td>5.32e+05</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                      y   R-squared:                       0.950\n",
       "Model:                            OLS   Adj. R-squared:                  0.948\n",
       "Method:                 Least Squares   F-statistic:                     450.8\n",
       "Date:                Wed, 26 Jul 2017   Prob (F-statistic):           2.16e-31\n",
       "Time:                        23:17:26   Log-Likelihood:                -525.54\n",
       "No. Observations:                  50   AIC:                             1057.\n",
       "Df Residuals:                      47   BIC:                             1063.\n",
       "Df Model:                           2                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "const       4.698e+04   2689.933     17.464      0.000    4.16e+04    5.24e+04\n",
       "x1             0.7966      0.041     19.266      0.000       0.713       0.880\n",
       "x2             0.0299      0.016      1.927      0.060      -0.001       0.061\n",
       "==============================================================================\n",
       "Omnibus:                       14.677   Durbin-Watson:                   1.257\n",
       "Prob(Omnibus):                  0.001   Jarque-Bera (JB):               21.161\n",
       "Skew:                          -0.939   Prob(JB):                     2.54e-05\n",
       "Kurtosis:                       5.575   Cond. No.                     5.32e+05\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "[2] The condition number is large, 5.32e+05. This might indicate that there are\n",
       "strong multicollinearity or other numerical problems.\n",
       "\"\"\""
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_opt = X[:, [0, 3, 5]]\n",
    "regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit()\n",
    "regressor_OLS.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>            <td>y</td>        <th>  R-squared:         </th> <td>   0.947</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.945</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   849.8</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Wed, 26 Jul 2017</td> <th>  Prob (F-statistic):</th> <td>3.50e-32</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>23:23:57</td>     <th>  Log-Likelihood:    </th> <td> -527.44</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>    50</td>      <th>  AIC:               </th> <td>   1059.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>    48</td>      <th>  BIC:               </th> <td>   1063.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     1</td>      <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "    <td></td>       <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>const</th> <td> 4.903e+04</td> <td> 2537.897</td> <td>   19.320</td> <td> 0.000</td> <td> 4.39e+04</td> <td> 5.41e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x1</th>    <td>    0.8543</td> <td>    0.029</td> <td>   29.151</td> <td> 0.000</td> <td>    0.795</td> <td>    0.913</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>13.727</td> <th>  Durbin-Watson:     </th> <td>   1.116</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.001</td> <th>  Jarque-Bera (JB):  </th> <td>  18.536</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td>-0.911</td> <th>  Prob(JB):          </th> <td>9.44e-05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 5.361</td> <th>  Cond. No.          </th> <td>1.65e+05</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                      y   R-squared:                       0.947\n",
       "Model:                            OLS   Adj. R-squared:                  0.945\n",
       "Method:                 Least Squares   F-statistic:                     849.8\n",
       "Date:                Wed, 26 Jul 2017   Prob (F-statistic):           3.50e-32\n",
       "Time:                        23:23:57   Log-Likelihood:                -527.44\n",
       "No. Observations:                  50   AIC:                             1059.\n",
       "Df Residuals:                      48   BIC:                             1063.\n",
       "Df Model:                           1                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "const       4.903e+04   2537.897     19.320      0.000    4.39e+04    5.41e+04\n",
       "x1             0.8543      0.029     29.151      0.000       0.795       0.913\n",
       "==============================================================================\n",
       "Omnibus:                       13.727   Durbin-Watson:                   1.116\n",
       "Prob(Omnibus):                  0.001   Jarque-Bera (JB):               18.536\n",
       "Skew:                          -0.911   Prob(JB):                     9.44e-05\n",
       "Kurtosis:                       5.361   Cond. No.                     1.65e+05\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "[2] The condition number is large, 1.65e+05. This might indicate that there are\n",
       "strong multicollinearity or other numerical problems.\n",
       "\"\"\""
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_opt = X[:, [0, 3]]\n",
    "regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit()\n",
    "regressor_OLS.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
